*===============================================================================
*
*					WORKER BELIEFS ABOUT OUTSIDE OPTIONS
*		(c)	Simon Jaeger, Christopher Roth, Nina Roussille, Benjamin Schoefer
*							  2023 December 5
*						   	   SOEP-IAB Data 
*
*===============================================================================


********************************************************************************
*							Coworker Wage Changes 							   *
********************************************************************************

cap log close                                      
log using ${log}/2_coworker_wagechanges.log, replace
set seed 6000

*** looping over restrictions on coworker wage changes (each restriction explained below)

foreach restriction in none unemp sameocc sameedu sameage sameinc{

	di "`restriction'"
	
	if "`restriction'"=="none" local suffix "" // no restrictions, except the coworkers must be employed fulltime before and after move
	if "`restriction'"=="unemp" local suffix "_unemp" // the move must be intermediated by an unemployment spell
	if "`restriction'"=="sameocc" local suffix "_socc" // the move must be within-occupation, and the mover must have the same occupation as the SOEP individual
	if "`restriction'"=="sameedu" local suffix "_sedu" // the mover must have the same education level as the SOEP individual
	if "`restriction'"=="sameage" local suffix "_sage" // the mover must be in the same age category as the SOEP individual
	if "`restriction'"=="sameinc" local suffix "_sinc" // the mover must be in the same income quintile as the SOEP individual

	cap noisily {
	use prs_id betnr ieb_beg_epi ieb_end_epi ieb_tag_entg ieb_quellverf_id ieb_beruf_kons_num berufstellg_imp ieb_dba_id geb_dat wz08_kons_num using "$orig/coworker_fullhist_IEB.dta" if inrange(year(ieb_beg_epi),2015,2019), clear
	
	gen jahr = year(ieb_beg_epi)	
	
	*** cleaning variables used for same-occupation/same-education/same-age/same-income/fulltime-to-fulltime restrictions
	
	gen occ1 = floor(ieb_beruf_kons_num/10000)
	su occ1, d
	rename ieb_dba_id ausbildung
	gen educ = .
	replace educ = 1 if ausbildung==1 // no education
	replace educ = 2 if ausbildung==2 // vocational education
	replace educ = 3 if ausbildung==11 | ausbildung==12 // university education
	replace educ = 4 if educ==. // missing education
	xtile wage_quintile = ieb_tag_entg, nq(5)
	gen yob = year(geb_dat)
	gen age = jahr-yob
	gen agecat = .
	replace agecat = 1 if age<30
	replace agecat = 2 if inrange(age,30,39)
	replace agecat = 3 if inrange(age,40,49)
	replace agecat = 4 if inrange(age,50,59)
	replace agecat = 5 if inrange(age,60,69)
	replace agecat = 6 if age>69 & age!=.
	replace agecat = 7 if age==.
	tab berufstellg_imp, m
	replace berufstellg_imp = . if berufstellg_imp<0 
	
	gen fulltime = !(inlist(berufstellg_imp,8,9,21))
	replace fulltime = . if berufstellg_imp==.
	tab fulltime, m
	
	* merging on the SOEP ID variable	
	getlink, mergevar(prs_id)
	

	
	*** saving dataset that is later used by 5_machinelearning
	* this dataset contains cleaned IAB variables for all of our SOEP individuals	
	if "`restriction'"=="none" {
		preserve
			* restricting to observations with nonmissing SOEP ID		
			keep if pid!=. & _merge==3
			* keeping only one, most recent, observation per SOEP person			
			egen maxyear = max(jahr), by(pid)
			* keeping 2019 observations 			
			keep if jahr==maxyear
			keep if jahr==2019
			* ensures reproducable results if two spells with same earnings 
			gen random_sort_var = runiform()	
			sort pid random_sort_var
			drop random_sort_var 
			quietly by pid: gen dup = cond(_N==1,0,_n)
			drop if dup>1
			* cleaning industry variable
			gen industry = .
			replace industry = 1 if wz08_kons_num<5000 & wz08_kons_num>0 			
			replace industry = 2 if inrange(wz08_kons_num,5000,9999)
			replace industry = 3 if inrange(floor(wz08_kons_num/1000),10,34) & industry==.
			replace industry = 4 if floor(wz08_kons_num/1000)==35 & industry==.
			replace industry = 5 if inrange(floor(wz08_kons_num/1000),36,39) & industry==.
			replace industry = 6 if inrange(floor(wz08_kons_num/1000),41,43) & industry==.
			replace industry = 7 if inrange(floor(wz08_kons_num/1000),45,47) & industry==.
			replace industry = 8 if inrange(floor(wz08_kons_num/1000),49,53) & industry==.
			replace industry = 9 if inrange(floor(wz08_kons_num/1000),55,56) & industry==.
			replace industry = 10 if inrange(floor(wz08_kons_num/1000),58,63) & industry==.
			replace industry = 11 if inrange(floor(wz08_kons_num/1000),64,66) & industry==.
			replace industry = 12 if floor(wz08_kons_num/1000)==68 & industry==.
			replace industry = 13 if inrange(floor(wz08_kons_num/1000),69,75) & industry==.
			replace industry = 14 if inrange(floor(wz08_kons_num/1000),77,82) & industry==.
			replace industry = 15 if floor(wz08_kons_num/1000)==84 & industry==.
			replace industry = 16 if floor(wz08_kons_num/1000)==85 & industry==.
			replace industry = 17 if inrange(floor(wz08_kons_num/1000),86,88) & industry==.
			replace industry = 18 if inrange(floor(wz08_kons_num/1000),90,93) & industry==.
			replace industry = 19 if inrange(floor(wz08_kons_num/1000),94,96) & industry==.
			replace industry = 20 if inrange(floor(wz08_kons_num/1000),97,98) & industry==.
			replace industry = 21 if floor(wz08_kons_num/1000)==99 & industry==.
			rename industry industry_2
			keep pid occ1 educ wage_quintile agecat industry_2
			save "$data/gsoep_adminchars.dta", replace
		restore
	}
	cap noisily drop wz08_kons_num
	drop if _merge==2
	drop _merge
	
	
	
	*** beginning coworker wage change calculations 
	
	* don't include SOEP individuals among movers	
	drop if pid!=.
	drop pid
	keep if jahr>=2015 & jahr<=2019	
	if "`restriction'"!="unemp"  keep if betnr!=. // only employment spells
	if "`restriction'"=="unemp" keep if betnr!=. | (ieb_quellverf_id==2 & ieb_tag_entg>0 & ieb_tag_entg!=.) // employment and UI spells
	
	
	** keep one main spell per year (with highest compensation)	
	if "`restriction'"!="unemp"{ 
		gen spell_earnings = (ieb_end_epi-ieb_beg_epi+1)*ieb_tag_entg
		drop if spell_earnings == . | spell_earnings<0
		* ensures reproducable results if two spells with same earnings 
		gen random_sort_var = runiform()	
		gen sort_spell_earnings = -spell_earnings
		quietly bys prs_id jahr (sort_spell_earnings random_sort_var): gen priority_spell = _n
		drop sort_spell_earnings random_sort_var
		keep if priority_spell==1
	}
	if "`restriction'"=="unemp"{
		preserve
			keep if ieb_quellverf_id==2
			save "$data/coworker_wagechanges_tempfile1.dta", replace
		restore
		preserve
			keep if betnr!=.
			gen spell_earnings = (ieb_end_epi-ieb_beg_epi+1)*ieb_tag_entg
			drop if spell_earnings == . | spell_earnings<0
			gen sort_spell_earnings = -spell_earnings
			* ensures reproducable results if two spells with same earnings 
			gen random_sort_var = runiform() 
			sort sort_spell_earnings random_sort_var
			bys prs_id jahr (sort_spell_earnings random_sort_var): gen priority_spell = _n
			drop sort_spell_earnings random_sort_var
			keep if priority_spell==1
			save "$data/coworker_wagechanges_tempfile2.dta", replace
		restore
		use "$data/coworker_wagechanges_tempfile1.dta", clear
		append using "$data/coworker_wagechanges_tempfile2.dta"
		* ensures reproducable results if two spells with same earnings 
		gen random_sort_var = runiform() 		
		sort prs_id ieb_beg_epi ieb_end_epi random_sort_var
		drop random_sort_var
		gen dummy = ieb_quellverf_id==2
		egen maxdummy = max(dummy), by(prs_id)
		di _N
		keep if maxdummy==1
		drop if dummy==1 & prs_id==prs_id[_n-1] & ieb_beg_epi-ieb_end_epi[_n-1]<=-5
		gen keepdummy = 0
		replace keepdummy = 1 if prs_id==prs_id[_n+1] & betnr!=. & ieb_quellverf_id[_n+1]==2 & ///
			(ieb_beg_epi[_n+1]-ieb_end_epi<84) & (ieb_beg_epi[_n+1]-ieb_end_epi>-5)
		keep if betnr!=.
		di "unemployment restriction setup succeeded"		
	}
	
	* restricting to relevant variable list
	if "`restriction'"=="none" keep prs_id betnr jahr ieb_tag_entg fulltime
	if "`restriction'"=="unemp" keep prs_id betnr jahr ieb_tag_entg keepdummy fulltime
	if "`restriction'"!="none" & "`restriction'"!="unemp" keep prs_id betnr jahr ieb_tag_entg occ1 educ wage_quintile agecat fulltime
	
	* generating mover wage changes by origin firm	
	gen ln_wage_non_winsorized = ln(ieb_tag_entg)
	winsor ln_wage_non_winsorized, p(0.02) generate(ln_wage)
	gen wage_non_winsorized = ieb_tag_entg	
	winsor wage_non_winsorized, p(0.02) generate(wage)
	sum wage_non_winsorized	
	sum ln_wage_non_winsorized
	di "creating delta_ln_wage_mover"
	bys prs_id (jahr): gen delta_ln_wage_mover = ln_wage[_n+1]-ln_wage if (betnr[_n+1]!=betnr) & (jahr[_n+1]==jahr+1)
	sum delta_ln_wage_mover
	
	* checking full-time status at next job	
	bys prs_id (jahr): gen fulltimenext = fulltime[_n+1] if (betnr[_n+1]!=betnr) & (jahr[_n+1]==jahr+1)	
	
	* setting up the "moving within same occupation" restriction	
	if "`restriction'"=="sameocc" {
		di "`restriction'"		
		su delta_ln_wage_mover
		local occvar "occ1"
		bys prs_id (jahr): gen nextocc = `occvar'[_n+1]
	}
	di "keeping nonmissing wage change observations"
	di _N
	keep if delta_ln_wage_mover!=.
	* imposing the occupation restriction	
	if "`restriction'"=="sameocc" {
		di "imposing `restriction'"
		local occvar "occ1"
		di _N
		drop if `occvar'==.
		drop if nextocc!=`occvar'
		di _N
	}
	
	* saving interm. data set for AKM figures	
	if "`restriction'"=="unemp"{
		save "$data/akm_eue.dta", replace 
	}		

	* imposing the unemployment restriction
	if "`restriction'"=="unemp"{
		di "`restriction'"
		di "checking share of FT-to-FT moves that are EUE"
		tab keepdummy if fulltime==1 & fulltimenext==1
		tab keepdummy
		keep if keepdummy==1
		drop keepdummy
		di _N
	}
	
	di "keeping fulltime to fulltime transitions"
	local suffix "`suffix'FT"	
	di _N
	tab fulltime fulltimenext
	keep if fulltime==1 & fulltimenext==1
	
	* saving datasets for machine learning	
	if "`restriction'"=="unemp" {
		keep prs_id betnr jahr delta_ln_wage_mover ln_wage fulltime fulltimenext 
		su delta_ln_wage_mover 
		save "$data/machinelearning_unempFT.dta", replace
	}
	
	* collapsing to the firm level, generating split-sample variables for IV specs
	gen random_sort_var = runiform()	
	bys betnr (random_sort_var): gen r1 = _n/_N<=0.5 if _N>3 
	drop random_sort_var 
	cap noisily erase "$data/coworker_wage_changes`suffix'.dta"
	if "`restriction'"!="sameocc" & "`restriction'"!="sameedu" & "`restriction'"!="sameage" & "`restriction'"!="sameinc" ///
		local collapselist "betnr"
	if "`restriction'"=="sameocc" local collapselist "betnr occ1"
	if "`restriction'"=="sameage" local collapselist "betnr agecat"
	if "`restriction'"=="sameedu" local collapselist "betnr educ"
	if "`restriction'"=="sameinc" local collapselist "betnr wage_quintile"
	gen tempdummy = 1
	foreach year in 2015 2017 {
		bys `collapselist': egen mean_delta_ln_wage_mover_`year' = mean(delta_ln_wage_mover) if jahr>=`year'
		bys `collapselist': egen med_delta_ln_wage_mover_`year' = median(delta_ln_wage_mover) if jahr>=`year'
		bys `collapselist': egen mean_delta_ln_wage_mover_r0_`year' = mean(delta_ln_wage_mover) if jahr>=`year'&r1==0
		bys `collapselist': egen mean_delta_ln_wage_mover_r1_`year' = mean(delta_ln_wage_mover) if jahr>=`year'&r1==1
		bys `collapselist': egen med_delta_ln_wage_mover_r0_`year' = median(delta_ln_wage_mover) if jahr>=`year'&r1==0
		bys `collapselist': egen med_delta_ln_wage_mover_r1_`year' = median(delta_ln_wage_mover) if jahr>=`year'&r1==1
		bys `collapselist': egen sd_delta_ln_wage_mover_`year' = sd(delta_ln_wage_mover) if jahr>=`year'
		bys `collapselist': egen num_movers_`year' = sum(tempdummy) if jahr>=`year'
	}
	keep `collapselist' mean_* med_* num_movers_* sd_delta* 
	
	* collapsing to the firm level	
	collapse mean_* med_* num_movers_* sd_delta*, by(`collapselist') 	
	cap noisily groups num_movers_2015, select(f>20)
	cap noisily groups num_movers_2017, select(f>20)
	sort betnr
	compress
	drop if betnr==.
	sum mean_* med_*
	
	* implementing Empirical Bayes correction	
	foreach year in 2015 2017 {
		gen se_ln`year' = sd_delta_ln_wage_mover_`year'/sqrt(num_movers_`year')
		ebayes mean_delta_ln_wage_mover_`year' se_ln`year', gen(ebayes_delta_ln_wage_mover_`year')
		su ebayes_delta_ln_wage_mover_`year'
		}
	save "$data/coworker_wage_changes`suffix'.dta", replace
	di "`restriction' completed successfully"
	}
} 	
	




cap log close
clear